1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 import java.nio.charset.*;
31 import java.nio.*;
32 import java.util.*;
33
34 public class TestEUC_TW {
35
36 static class Time {
37 long t;
38 }
39 static int iteration = 100;
40
41 static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
42 throws Exception {
43 String csn = cs.name();
44 CharsetDecoder dec = cs.newDecoder();
45 ByteBuffer bbf;
46 CharBuffer cbf;
47 if (testDirect) {
48 bbf = ByteBuffer.allocateDirect(bb.length);
49 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
50 bbf.put(bb);
51 } else {
52 bbf = ByteBuffer.wrap(bb);
53 cbf = CharBuffer.allocate(bb.length);
54 }
55 CoderResult cr = null;
56 long t1 = System.nanoTime()/1000;
57 for (int i = 0; i < iteration; i++) {
58 bbf.rewind();
59 cbf.clear();
60 dec.reset();
61 cr = dec.decode(bbf, cbf, true);
62 }
63 long t2 = System.nanoTime()/1000;
64 if (t != null)
65 t.t = (t2 - t1)/iteration;
66 if (cr != CoderResult.UNDERFLOW) {
67 System.out.println("DEC-----------------");
68 int pos = bbf.position();
69 System.out.printf(" cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
70 cr.toString(), pos,
71 bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
72 throw new RuntimeException("Decoding err: " + csn);
73 }
74 char[] cc = new char[cbf.position()];
75 cbf.flip(); cbf.get(cc);
76 return cc;
77
78 }
79
80 static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
81 throws Exception {
82 CharsetDecoder dec = cs.newDecoder();
83 ByteBuffer bbf;
84 CharBuffer cbf;
85 if (testDirect) {
86 bbf = ByteBuffer.allocateDirect(bb.length);
87 cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
88 bbf.put(bb).flip();
89 } else {
90 bbf = ByteBuffer.wrap(bb);
91 cbf = CharBuffer.allocate(bb.length);
92 }
93 return dec.decode(bbf, cbf, true);
94 }
95
96 static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
97 throws Exception {
98 ByteBuffer bbf;
99 CharBuffer cbf;
100 CharsetEncoder enc = cs.newEncoder();
101 String csn = cs.name();
102 if (testDirect) {
103 bbf = ByteBuffer.allocateDirect(cc.length * 4);
104 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
105 cbf.put(cc).flip();
106 } else {
107 bbf = ByteBuffer.allocate(cc.length * 4);
108 cbf = CharBuffer.wrap(cc);
109 }
110 CoderResult cr = null;
111 long t1 = System.nanoTime()/1000;
112 for (int i = 0; i < iteration; i++) {
113 cbf.rewind();
114 bbf.clear();
115 enc.reset();
116 cr = enc.encode(cbf, bbf, true);
117 }
118 long t2 = System.nanoTime()/1000;
119 if (t != null)
120 t.t = (t2 - t1)/iteration;
121 if (cr != CoderResult.UNDERFLOW) {
122 System.out.println("ENC-----------------");
123 int pos = cbf.position();
124 System.out.printf(" cr=%s, cbf.pos=%d, cc[pos]=%x%n",
125 cr.toString(), pos, cc[pos]&0xffff);
126 throw new RuntimeException("Encoding err: " + csn);
127 }
128 byte[] bb = new byte[bbf.position()];
129 bbf.flip(); bbf.get(bb);
130 return bb;
131 }
132
133 static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
134 throws Exception {
135 ByteBuffer bbf;
136 CharBuffer cbf;
137 CharsetEncoder enc = cs.newEncoder();
138 if (testDirect) {
139 bbf = ByteBuffer.allocateDirect(cc.length * 4);
140 cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
141 cbf.put(cc).flip();
142 } else {
143 bbf = ByteBuffer.allocate(cc.length * 4);
144 cbf = CharBuffer.wrap(cc);
145 }
146 return enc.encode(cbf, bbf, true);
147 }
148
149 static char[] getEUC_TWChars(boolean skipNR) {
150
151 CharsetEncoder encOLD = new EUC_TW_OLD().newEncoder();
152 CharsetEncoder enc = Charset.forName("EUC_TW").newEncoder();
153 char[] cc = new char[0x20000];
154 char[] c2 = new char[2];
155 int pos = 0;
156 int i = 0;
157
158 for (i = 0; i < 0x10000; i++) {
159
160 if (skipNR && (i == 0x4ea0 || i == 0x51ab || i == 0x52f9))
161 continue;
162 if (encOLD.canEncode((char)i) != enc.canEncode((char)i)) {
163 System.out.printf(" Err i=%x: old=%b new=%b%n", i,
164 encOLD.canEncode((char)i),
165 enc.canEncode((char)i));
166 throw new RuntimeException("canEncode() err!");
167 }
168
169 if (enc.canEncode((char)i)) {
170 cc[pos++] = (char)i;
171 }
172 }
173
174
175 CharBuffer cb = CharBuffer.wrap(new char[2]);
176 for (i = 0x20000; i < 0x30000; i++) {
177 Character.toChars(i, c2, 0);
178 cb.clear();cb.put(c2[0]);cb.put(c2[1]);cb.flip();
179
180 if (encOLD.canEncode(cb) != enc.canEncode(cb)) {
181 throw new RuntimeException("canEncode() err!");
182 }
183
184 if (enc.canEncode(cb)) {
185
186 cc[pos++] = c2[0];
187 cc[pos++] = c2[1];
188 }
189 }
190
191 return Arrays.copyOf(cc, pos);
192 }
193
194 static void checkRoundtrip(Charset cs) throws Exception {
195 char[] cc = getEUC_TWChars(false);
196 System.out.printf("Check roundtrip <%s>...", cs.name());
197 byte[] bb = encode(cc, cs, false, null);
198 char[] ccO = decode(bb, cs, false, null);
199
200 if (!Arrays.equals(cc, ccO)) {
201 System.out.printf(" non-direct failed");
202 }
203 bb = encode(cc, cs, true, null);
204 ccO = decode(bb, cs, true, null);
205 if (!Arrays.equals(cc, ccO)) {
206 System.out.printf(" (direct) failed");
207 }
208 System.out.println();
209 }
210
211 static void checkInit(String csn) throws Exception {
212 System.out.printf("Check init <%s>...%n", csn);
213 Charset.forName("Big5");
214 long t1 = System.nanoTime()/1000;
215 Charset cs = Charset.forName(csn);
216 long t2 = System.nanoTime()/1000;
217 System.out.printf(" charset :%d%n", t2 - t1);
218 t1 = System.nanoTime()/1000;
219 cs.newDecoder();
220 t2 = System.nanoTime()/1000;
221 System.out.printf(" new Decoder :%d%n", t2 - t1);
222
223 t1 = System.nanoTime()/1000;
224 cs.newEncoder();
225 t2 = System.nanoTime()/1000;
226 System.out.printf(" new Encoder :%d%n", t2 - t1);
227 }
228
229 static void compare(Charset cs1, Charset cs2) throws Exception {
230 char[] cc = getEUC_TWChars(true);
231
232 String csn1 = cs1.name();
233 String csn2 = cs2.name();
234 System.out.printf("Diff <%s> <%s>...%n", csn1, csn2);
235
236 Time t1 = new Time();
237 Time t2 = new Time();
238
239 byte[] bb1 = encode(cc, cs1, false, t1);
240 byte[] bb2 = encode(cc, cs2, false, t2);
241
242 System.out.printf(" Encoding TimeRatio %s/%s: %d,%d :%f%n",
243 csn2, csn1,
244 t2.t, t1.t,
245 (double)(t2.t)/(t1.t));
246 if (!Arrays.equals(bb1, bb2)) {
247 System.out.printf(" encoding failed%n");
248 }
249
250 char[] cc2 = decode(bb1, cs2, false, t2);
251 char[] cc1 = decode(bb1, cs1, false, t1);
252 System.out.printf(" Decoding TimeRatio %s/%s: %d,%d :%f%n",
253 csn2, csn1,
254 t2.t, t1.t,
255 (double)(t2.t)/(t1.t));
256 if (!Arrays.equals(cc1, cc2)) {
257 System.out.printf(" decoding failed%n");
258 }
259
260 bb1 = encode(cc, cs1, true, t1);
261 bb2 = encode(cc, cs2, true, t2);
262
263 System.out.printf(" Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
264 csn2, csn1,
265 t2.t, t1.t,
266 (double)(t2.t)/(t1.t));
267
268 if (!Arrays.equals(bb1, bb2))
269 System.out.printf(" encoding (direct) failed%n");
270
271 cc1 = decode(bb1, cs1, true, t1);
272 cc2 = decode(bb1, cs2, true, t2);
273 System.out.printf(" Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
274 csn2, csn1,
275 t2.t, t1.t,
276 (double)(t2.t)/(t1.t));
277 if (!Arrays.equals(cc1, cc2)) {
278 System.out.printf(" decoding (direct) failed%n");
279 }
280 }
281
282
283 static byte[][] malformed = {
284
285 };
286
287 static void checkMalformed(Charset cs) throws Exception {
288 boolean failed = false;
289 String csn = cs.name();
290 System.out.printf("Check malformed <%s>...%n", csn);
291 for (boolean direct: new boolean[] {false, true}) {
292 for (byte[] bins : malformed) {
293 int mlen = bins[0];
294 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
295 CoderResult cr = decodeCR(bin, cs, direct);
296 String ashex = "";
297 for (int i = 0; i < bin.length; i++) {
298 if (i > 0) ashex += " ";
299 ashex += Integer.toBinaryString((int)bin[i] & 0xff);
300 }
301 if (!cr.isMalformed()) {
302 System.out.printf(" FAIL(direct=%b): [%s] not malformed.\n", direct, ashex);
303 failed = true;
304 } else if (cr.length() != mlen) {
305 System.out.printf(" FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
306 failed = true;
307 }
308 }
309 }
310 if (failed)
311 throw new RuntimeException("Check malformed failed " + csn);
312 }
313
314 static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
315 int inPos = flow[0];
316 int inLen = flow[1];
317 int outPos = flow[2];
318 int outLen = flow[3];
319 int expedInPos = flow[4];
320 int expedOutPos = flow[5];
321 CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
322 :CoderResult.OVERFLOW;
323 ByteBuffer bbf;
324 CharBuffer cbf;
325 if (direct) {
326 bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
327 cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
328 } else {
329 bbf = ByteBuffer.allocate(inPos + bytes.length);
330 cbf = CharBuffer.allocate(outPos + outLen);
331 }
332 bbf.position(inPos);
333 bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
334 cbf.position(outPos);
335 dec.reset();
336 CoderResult cr = dec.decode(bbf, cbf, false);
337 if (cr != expedCR ||
338 bbf.position() != expedInPos ||
339 cbf.position() != expedOutPos) {
340 System.out.printf("Expected(direct=%5b): [", direct);
341 for (int i:flow) System.out.print(" " + i);
342 System.out.println("] CR=" + cr +
343 ", inPos=" + bbf.position() +
344 ", outPos=" + cbf.position());
345 return false;
346 }
347 return true;
348 }
349
350 static void checkUnderOverflow(Charset cs) throws Exception {
351 String csn = cs.name();
352 System.out.printf("Check under/overflow <%s>...%n", csn);
353 CharsetDecoder dec = cs.newDecoder();
354 boolean failed = false;
355
356
357 byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
358 int inlen = bytes.length;
359
360 int MAXOFF = 20;
361 for (int inoff = 0; inoff < MAXOFF; inoff++) {
362 for (int outoff = 0; outoff < MAXOFF; outoff++) {
363 int[][] Flows = {
364
365
366 {inoff, inlen, outoff, 1, inoff + 1, outoff + 1, 1},
367 {inoff, inlen, outoff, 2, inoff + 3, outoff + 2, 1},
368 {inoff, inlen, outoff, 3, inoff + 7, outoff + 3, 1},
369 {inoff, inlen, outoff, 4, inoff + 11, outoff + 4, 1},
370 {inoff, inlen, outoff, 5, inoff + 11, outoff + 4, 1},
371 {inoff, inlen, outoff, 6, inoff + 15, outoff + 6, 0},
372
373 {inoff, 1, outoff, 6, inoff + 1, outoff + 1, 0},
374 {inoff, 2, outoff, 6, inoff + 1, outoff + 1, 0},
375 {inoff, 3, outoff, 6, inoff + 3, outoff + 2, 0},
376 {inoff, 4, outoff, 6, inoff + 3, outoff + 2, 0},
377 {inoff, 5, outoff, 6, inoff + 3, outoff + 2, 0},
378 {inoff, 8, outoff, 6, inoff + 7, outoff + 3, 0},
379 {inoff, 9, outoff, 6, inoff + 7, outoff + 3, 0},
380 {inoff, 10, outoff, 6, inoff + 7, outoff + 3, 0},
381 {inoff, 11, outoff, 6, inoff +11, outoff + 4, 0},
382 {inoff, 12, outoff, 6, inoff +11, outoff + 4, 0},
383 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0},
384
385 {inoff, 2, outoff, 1, inoff + 1, outoff + 1, 0},
386 {inoff, 3, outoff, 1, inoff + 1, outoff + 1, 1},
387 {inoff, 3, outoff, 2, inoff + 3, outoff + 2, 0},
388
389 {inoff, 4, outoff, 2, inoff + 3, outoff + 2, 0},
390 {inoff, 5, outoff, 2, inoff + 3, outoff + 2, 0},
391 {inoff, 6, outoff, 2, inoff + 3, outoff + 2, 0},
392 {inoff, 7, outoff, 2, inoff + 3, outoff + 2, 1},
393 {inoff, 7, outoff, 3, inoff + 7, outoff + 3, 0},
394
395 {inoff, 8, outoff, 3, inoff + 7, outoff + 3, 0},
396 {inoff, 9, outoff, 3, inoff + 7, outoff + 3, 0},
397 {inoff, 10, outoff, 3, inoff + 7, outoff + 3, 0},
398 {inoff, 11, outoff, 3, inoff + 7, outoff + 3, 1},
399 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0},
400
401 {inoff, 11, outoff, 4, inoff +11, outoff + 4, 0},
402 {inoff, 12, outoff, 4, inoff +11, outoff + 4, 0},
403 {inoff, 13, outoff, 4, inoff +11, outoff + 4, 0},
404 {inoff, 14, outoff, 4, inoff +11, outoff + 4, 0},
405 {inoff, 15, outoff, 4, inoff +11, outoff + 4, 1},
406 {inoff, 15, outoff, 5, inoff +11, outoff + 4, 1},
407 {inoff, 15, outoff, 6, inoff +15, outoff + 6, 0},
408 };
409 for (boolean direct: new boolean[] {false, true}) {
410 for (int[] flow: Flows) {
411 if (!check(dec, bytes, direct, flow))
412 failed = true;
413 }
414 }}}
415 if (failed)
416 throw new RuntimeException("Check under/overflow failed " + csn);
417 }
418
419 public static void main(String[] args) throws Exception {
420
421
422 checkInit("EUC_TW");
423 Charset euctw = Charset.forName("EUC_TW");
424 checkRoundtrip(euctw);
425 compare(euctw, new EUC_TW_OLD());
426 checkMalformed(euctw);
427 checkUnderOverflow(euctw);
428 }
429 }